{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "RNN_Autocomplete",
      "version": "0.3.2",
      "provenance": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/sanglee/BIML2019/blob/master/RNN_Autocomplete.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "metadata": {
        "id": "UdVnwLJ4jE4w",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "d17c9ab5-786f-46ea-b8bb-c81e4b15ab9e"
      },
      "cell_type": "code",
      "source": [
        "\"\"\"\n",
        "  Autocompletion of the last character of words\n",
        "  Given the first three letters of a four-letters word, learn to predict the last letter \n",
        "\"\"\"\n",
        "\n",
        "import tensorflow as tf\n",
        "import keras\n",
        "import numpy as np\n",
        "\n",
        "\n",
        "vocab = ['a', 'b', 'c', 'd', 'e', 'f', 'g',\n",
        "         'h', 'i', 'j', 'k', 'l', 'm', 'n',\n",
        "         'o', 'p', 'q', 'r', 's', 't', 'u',\n",
        "         'v', 'w', 'x', 'y', 'z']\n",
        "\n",
        "# index array of characters in vocab\n",
        "v_map = {n: i for i, n in enumerate(vocab)}\n",
        "v_len = len(v_map)\n",
        "\n",
        "# training data (character sequences)\n",
        "# wor -> X, d -> Y\n",
        "# woo -> X, d -> Y\n",
        "training_data = ['word', 'wood', 'deep', 'dive', 'cold', 'cool', 'load', 'love', 'kiss', 'kind']\n",
        "test_data = ['wood', 'deep', 'cold', 'load', 'love', 'dear', 'dove', 'cell', 'life', 'keep']"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Using TensorFlow backend.\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "metadata": {
        "id": "n7FaNCbNjNL0",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "def make_batch(seq_data):\n",
        "    input_batch = []\n",
        "    target_batch = []\n",
        "\n",
        "    for seq in seq_data:\n",
        "        # Indices of the first three alphabets of the words\n",
        "        # [22, 14, 17] [22, 14, 14] [3, 4, 4] [3, 8, 21] ...\n",
        "        input = [v_map[n] for n in seq[:-1]]\n",
        "        # Indices of the last alphabet of the words\n",
        "        # 3, 3, 15, 4, 3 ...\n",
        "        target = v_map[seq[-1]]\n",
        "\n",
        "        # One-hot encoding of the inputs into the sequences of 26-dimensional vectors\n",
        "        # [0, 1, 2] ==>\n",
        "        # [[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]\n",
        "        #  [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]\n",
        "        #  [ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]]\n",
        "        input_batch.append(np.eye(v_len)[input])\n",
        "        \n",
        "        # We don't apply one-hot encoding for the output,  \n",
        "        # since we'll use sparse_softmax_cross_entropy_with_logits\n",
        "        # as our loss function\n",
        "        target_batch.append(target)\n",
        "\n",
        "    return input_batch, target_batch\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "aU3CY4T4jRuG",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "learning_rate = 0.01\n",
        "n_hidden = 10\n",
        "total_epoch = 100\n",
        "n_step = 3 # the length of the input sequence\n",
        "n_input = n_class = v_len # the size of each input\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "EGBtiMrOjUfM",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 197
        },
        "outputId": "2b95ec26-d47e-4e2f-dfde-71c667abeed1"
      },
      "cell_type": "code",
      "source": [
        "\"\"\"\n",
        "  Phase 1: Create the computation graph\n",
        "\"\"\"\n",
        "X = tf.placeholder(tf.float32, [None, n_step, n_input])\n",
        "Y = tf.placeholder(tf.int32, [None])\n",
        "\n",
        "W = tf.Variable(tf.random_normal([n_hidden, n_class]))\n",
        "b = tf.Variable(tf.random_normal([n_class]))\n",
        "\n",
        "# Create an LSTM cell\n",
        "cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)\n",
        "\n",
        "# Create the RNN\n",
        "outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)\n",
        "# output : [batch_size, max_time, cell.output_size]\n",
        "\n",
        "# Transform the output of RNN to create output values\n",
        "outputs = tf.transpose(outputs, [1, 0, 2])\n",
        "outputs = outputs[-1]\n",
        "model = tf.matmul(outputs, W) + b\n",
        "\n",
        "cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model, labels=Y))\n",
        "\n",
        "optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)"
      ],
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
            "Instructions for updating:\n",
            "Colocations handled automatically by placer.\n",
            "WARNING:tensorflow:From <ipython-input-4-02cd5eecc74e>:11: BasicLSTMCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
            "Instructions for updating:\n",
            "This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.\n",
            "WARNING:tensorflow:From <ipython-input-4-02cd5eecc74e>:14: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.\n",
            "Instructions for updating:\n",
            "Please use `keras.layers.RNN(cell)`, which is equivalent to this API\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "OK-7puFfjWSp",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1902
        },
        "outputId": "f82505ad-bbfe-468a-f9c0-3284e4c225f6"
      },
      "cell_type": "code",
      "source": [
        "\"\"\"\n",
        "  Phase 2: Train the model\n",
        "\"\"\"\n",
        "with tf.Session() as sess:\n",
        "    sess.run(tf.global_variables_initializer())\n",
        "\n",
        "    input_batch, target_batch = make_batch(training_data)\n",
        "\n",
        "    for epoch in range(total_epoch):\n",
        "        _, loss = sess.run([optimizer, cost],\n",
        "                           feed_dict={X: input_batch, Y: target_batch})\n",
        "\n",
        "        print('Epoch:', '%04d' % (epoch + 1),\n",
        "              'cost =', '{:.6f}'.format(loss))\n",
        "\n",
        "    print('Optimization finished')\n",
        "\n",
        "    \"\"\"\n",
        "      Make predictions\n",
        "    \"\"\"\n",
        "    seq_data = training_data\n",
        "    prediction = tf.cast(tf.argmax(model, 1), tf.int32)\n",
        "    accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, Y), tf.float32))\n",
        "\n",
        "    input_batch, target_batch = make_batch(seq_data)\n",
        "\n",
        "    predict, accuracy_val = sess.run([prediction, accuracy],\n",
        "                                     feed_dict={X: input_batch, Y: target_batch})\n",
        "\n",
        "    predicted = []\n",
        "    for idx, val in enumerate(seq_data):\n",
        "        last_char = vocab[predict[idx]]\n",
        "        predicted.append(val[:3] + last_char)\n",
        "\n",
        "    print('\\n=== Predictions ===')\n",
        "    print('Input:', [w[:3] + ' ' for w in seq_data])\n",
        "    print('Predicted:', predicted)\n",
        "    print('Accuracy:', accuracy_val)"
      ],
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Epoch: 0001 cost = 3.372146\n",
            "Epoch: 0002 cost = 3.301793\n",
            "Epoch: 0003 cost = 3.231223\n",
            "Epoch: 0004 cost = 3.159113\n",
            "Epoch: 0005 cost = 3.084567\n",
            "Epoch: 0006 cost = 3.006805\n",
            "Epoch: 0007 cost = 2.925208\n",
            "Epoch: 0008 cost = 2.839316\n",
            "Epoch: 0009 cost = 2.748787\n",
            "Epoch: 0010 cost = 2.653376\n",
            "Epoch: 0011 cost = 2.552949\n",
            "Epoch: 0012 cost = 2.447515\n",
            "Epoch: 0013 cost = 2.337271\n",
            "Epoch: 0014 cost = 2.222642\n",
            "Epoch: 0015 cost = 2.104293\n",
            "Epoch: 0016 cost = 1.983134\n",
            "Epoch: 0017 cost = 1.860328\n",
            "Epoch: 0018 cost = 1.737315\n",
            "Epoch: 0019 cost = 1.615802\n",
            "Epoch: 0020 cost = 1.497671\n",
            "Epoch: 0021 cost = 1.384820\n",
            "Epoch: 0022 cost = 1.278953\n",
            "Epoch: 0023 cost = 1.181361\n",
            "Epoch: 0024 cost = 1.092767\n",
            "Epoch: 0025 cost = 1.013268\n",
            "Epoch: 0026 cost = 0.942368\n",
            "Epoch: 0027 cost = 0.879080\n",
            "Epoch: 0028 cost = 0.822085\n",
            "Epoch: 0029 cost = 0.769956\n",
            "Epoch: 0030 cost = 0.721406\n",
            "Epoch: 0031 cost = 0.675461\n",
            "Epoch: 0032 cost = 0.631500\n",
            "Epoch: 0033 cost = 0.589197\n",
            "Epoch: 0034 cost = 0.548445\n",
            "Epoch: 0035 cost = 0.509295\n",
            "Epoch: 0036 cost = 0.471915\n",
            "Epoch: 0037 cost = 0.436560\n",
            "Epoch: 0038 cost = 0.403513\n",
            "Epoch: 0039 cost = 0.372989\n",
            "Epoch: 0040 cost = 0.345031\n",
            "Epoch: 0041 cost = 0.319465\n",
            "Epoch: 0042 cost = 0.296013\n",
            "Epoch: 0043 cost = 0.274452\n",
            "Epoch: 0044 cost = 0.254706\n",
            "Epoch: 0045 cost = 0.236805\n",
            "Epoch: 0046 cost = 0.220776\n",
            "Epoch: 0047 cost = 0.206537\n",
            "Epoch: 0048 cost = 0.193859\n",
            "Epoch: 0049 cost = 0.182405\n",
            "Epoch: 0050 cost = 0.171831\n",
            "Epoch: 0051 cost = 0.161870\n",
            "Epoch: 0052 cost = 0.152383\n",
            "Epoch: 0053 cost = 0.143349\n",
            "Epoch: 0054 cost = 0.134825\n",
            "Epoch: 0055 cost = 0.126891\n",
            "Epoch: 0056 cost = 0.119605\n",
            "Epoch: 0057 cost = 0.112962\n",
            "Epoch: 0058 cost = 0.106889\n",
            "Epoch: 0059 cost = 0.101274\n",
            "Epoch: 0060 cost = 0.096002\n",
            "Epoch: 0061 cost = 0.090991\n",
            "Epoch: 0062 cost = 0.086212\n",
            "Epoch: 0063 cost = 0.081670\n",
            "Epoch: 0064 cost = 0.077392\n",
            "Epoch: 0065 cost = 0.073405\n",
            "Epoch: 0066 cost = 0.069720\n",
            "Epoch: 0067 cost = 0.066335\n",
            "Epoch: 0068 cost = 0.063231\n",
            "Epoch: 0069 cost = 0.060380\n",
            "Epoch: 0070 cost = 0.057752\n",
            "Epoch: 0071 cost = 0.055316\n",
            "Epoch: 0072 cost = 0.053048\n",
            "Epoch: 0073 cost = 0.050926\n",
            "Epoch: 0074 cost = 0.048935\n",
            "Epoch: 0075 cost = 0.047062\n",
            "Epoch: 0076 cost = 0.045299\n",
            "Epoch: 0077 cost = 0.043637\n",
            "Epoch: 0078 cost = 0.042070\n",
            "Epoch: 0079 cost = 0.040590\n",
            "Epoch: 0080 cost = 0.039193\n",
            "Epoch: 0081 cost = 0.037875\n",
            "Epoch: 0082 cost = 0.036630\n",
            "Epoch: 0083 cost = 0.035456\n",
            "Epoch: 0084 cost = 0.034348\n",
            "Epoch: 0085 cost = 0.033300\n",
            "Epoch: 0086 cost = 0.032309\n",
            "Epoch: 0087 cost = 0.031368\n",
            "Epoch: 0088 cost = 0.030473\n",
            "Epoch: 0089 cost = 0.029619\n",
            "Epoch: 0090 cost = 0.028803\n",
            "Epoch: 0091 cost = 0.028023\n",
            "Epoch: 0092 cost = 0.027275\n",
            "Epoch: 0093 cost = 0.026560\n",
            "Epoch: 0094 cost = 0.025875\n",
            "Epoch: 0095 cost = 0.025220\n",
            "Epoch: 0096 cost = 0.024594\n",
            "Epoch: 0097 cost = 0.023994\n",
            "Epoch: 0098 cost = 0.023419\n",
            "Epoch: 0099 cost = 0.022868\n",
            "Epoch: 0100 cost = 0.022340\n",
            "Optimization finished\n",
            "\n",
            "=== Predictions ===\n",
            "Input: ['wor ', 'woo ', 'dee ', 'div ', 'col ', 'coo ', 'loa ', 'lov ', 'kis ', 'kin ']\n",
            "Predicted: ['word', 'wood', 'deep', 'dive', 'cold', 'cool', 'load', 'love', 'kiss', 'kind']\n",
            "Accuracy: 1.0\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "crRXEYzrkBWK",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}